# ruff: noqa

# This script generates a .pyi (stub) file for skrub/_data_ops/_data_ops.py.
# DataOps have attributes, in particular the methods for most operators, generated
# dynamically. This can cause static type checkers to report spurious errors
# saying that those operators are not defined for DataOp objects. To avoid this,
# we create a stub file in which we explicitly list all the dynamically
# generated attributes. The presence of a __getattr__ should also prevent
# checkers for complaining about regular (non-special, not __*__) attributes.
#
# When a stub file exists, checkers will not look at the actual module.
# Therefore besides the DataOp class, any other functions defined in the same
# module should also have their signature listed in the stub. Another option
# would be to make sure no public functions are defined in _data_ops.py,
# however that seems more constraining
#
# For simplicity, the stub file is generated manually (by running this script)
# and checked into version control. There is a pixi task (check-pyi-diff) and a
# github action that check the output of the script has not changed by
# computing the diff between the output of this script and the content of
# skrub/_data_ops/_data_ops.pyi
#
# If the output of the script changes, for example because an attribute has
# been added or removed to the DataOp class, the check will fail and we have to
# decide if we just need to regenerate the file or to edit the script (for
# example to exclude the new attribute from the generated annotations).
#
# Because the type of the result of a DataOp, and thus the operations that
# will be valid, is only known during execution, we don't expect that static
# checkers can produce useful output for DataOps. Therefore we just provide
# the minimal annotations to silence errors -- for example we don't detail the
# arguments but just put *args, **kwargs everywhere.
#
# A simpler solution to silence type errors would be to annotate all functions
# that create DataOps as returning Any. However, this would prevent some
# tools for providing autocompletion on the .skb attribute (both for .skb and
# for all the attributes of .skb). So we prefer to use the stub file.

import sys

stdout = sys.stdout
sys.stdout = sys.stderr

import io
import types

import skrub
from skrub._data_ops import _data_ops

file = io.StringIO()
p = file.write

p("""\
# fmt: off
# ruff: noqa

# DO NOT EDIT THIS FILE.
# IT IS AUTOMATICALLY GENERATED BY /build_tools/generate_data_ops_stub.py

from typing import Callable

from skrub._data_ops._skrub_namespace import SkrubNamespace

class DataOp:
    skb: SkrubNamespace
""")

for name in sorted(skrub.DataOp.__dict__):
    if name not in [
        "__bool__",
        "__contains__",
        "__dict__",
        "__doc__",
        "__firstlineno__",
        "__hash__",
        "__init__",
        "__iter__",
        "__module__",
        "__setattr__",
        "__setitem__",
        "__signature__",
        "__static_attributes__",
        "__weakref__",
        "skb",
    ]:
        p(f"    def {name}(*args, **kwargs) -> DataOp: ...\n")

p("\n")

not_a_data_op_factory = ["deferred"]

for name in sorted(set(_data_ops.__dict__).intersection(skrub.__dict__)):
    obj = getattr(_data_ops, name)
    if obj is not getattr(skrub, name):
        continue
    if not isinstance(obj, types.FunctionType) or name in not_a_data_op_factory:
        continue
    p(f"def {name}(*args, **kwargs) -> DataOp: ...\n")

p("def deferred(*args, **kwargs) -> Callable: ...\n")

stdout.write(file.getvalue())
